Libraries needed: [Note: I wrote results hide but the ## are not results maybe??]
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
library(patchwork)
Loading data & Checking out how the loaded data looks like:
mouse.data <- Read10X(data.dir = "/Users/xiaoyizheng/Downloads/Praktikum/Bioinfo_Einarbeiten/CellChatSelf/mouse/GSE113854_RAW/")
mouse.data
## [[ suppressing 32 column names 'AAACCTGAGAATTCCC-1', 'AAACCTGAGACCGGAT-1', 'AAACCTGAGACGCACA-1' ... ]]
## [[ suppressing 32 column names 'AAACCTGAGAATTCCC-1', 'AAACCTGAGACCGGAT-1', 'AAACCTGAGACGCACA-1' ... ]]
## [[ suppressing 32 column names 'AAACCTGAGAATTCCC-1', 'AAACCTGAGACCGGAT-1', 'AAACCTGAGACGCACA-1' ... ]]
Creating Seurat object:
mouse <- CreateSeuratObject(counts = mouse.data, project = "MoSk", min.cells = 3, min.features = 200)
QC (quality control) Matrix for selection and filtration of cells, using user-defined criteria. In this case it is the mitochondrial genes as criteria. [note: apparently they all have same level of mt content. Maybe pre-processed already? or maybe that is not the proper criteria to choose in this case. Which one could be better instead of it?]
mouse[["percent.mt"]] <- PercentageFeatureSet(mouse, pattern = "^MT-")
#Visualization by violin plot (mt percent no significance):
VlnPlot(mouse, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning in SingleExIPlot(type = type, data = data[, x, drop = FALSE], idents =
## idents, : All cells have the same value of percent.mt.
Genes and total molecules calculated and stored in obj. meta data.
Showing QC metrics for the first 5 cells:
head(mouse@meta.data, 5)
## orig.ident nCount_RNA nFeature_RNA percent.mt
## AAACCTGAGAATTCCC-1 MoSk 1475 783 0
## AAACCTGAGACCGGAT-1 MoSk 4148 1624 0
## AAACCTGAGACGCACA-1 MoSk 3136 1342 0
## AAACCTGAGATGTGGC-1 MoSk 3138 1323 0
## AAACCTGAGCTGCGAA-1 MoSk 4029 1721 0
#examining a few genes in the first thirty cells
mouse.data[c("Igf2", "Cck", "Pf4"), 1:30]
## 3 x 30 sparse Matrix of class "dgCMatrix"
## [[ suppressing 30 column names 'AAACCTGAGAATTCCC-1', 'AAACCTGAGACCGGAT-1', 'AAACCTGAGACGCACA-1' ... ]]
##
## Igf2 . . . . . 1 . . . . . . . . . . . 1 . . . . . . . . . . . .
## Cck . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
## Pf4 . . . . . . . . 11 . . . . . 25 . . 1 1 . 1 1 . 1 . . . . . 2
#a few genes in the first 30 cells
#picked "Igf2", "Cck", "Pf4" according to web
plot1 <- FeatureScatter(mouse, feature1 = "nCount_RNA", feature2 = "percent.mt")
## Warning in cor(x = data[, 1], y = data[, 2]): the standard deviation is zero
#since all have same mt quality...
plot2 <- FeatureScatter(mouse, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
#positively correlated
plot1 + plot2
since it is already normalized => this step omitted
mouse <- FindVariableFeatures(mouse, selection.method = "vst", nfeatures = 2000)
top10 <- head(VariableFeatures(mouse), 10) # Identify the 10 most highly variable genes
top10
## [1] "Hbb-bs" "Hba-a1" "Ccl21a" "Hbb-bt" "Saa3" "Acp5" "Ccl5" "S100a8"
## [9] "Mmp9" "S100a9"
plot1 <- VariableFeaturePlot(mouse)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
all.genes <- rownames(mouse)
mouse <- ScaleData(mouse, features = all.genes)
## Centering and scaling data matrix
#optional: mouse <- ScaleData(mouse)
mouse <- RunPCA(mouse, features = VariableFeatures(object = mouse))
## PC_ 1
## Positive: Sparc, Bgn, Col1a2, Col1a1, Col3a1, Col6a1, Lgals1, Serpinh1, Col5a2, Fstl1
## Tmsb10, Col6a2, Col5a1, Col12a1, Aebp1, Dcn, Postn, Meg3, Fn1, Col6a3
## Mmp2, Fbln2, Lum, S100a6, Fosb, Loxl1, Thbs2, Serpinf1, Lrrc15, Tuba1a
## Negative: Tyrobp, Fcer1g, Lyz2, C1qb, Apoe, Ctss, C1qa, C1qc, Ms4a7, Aif1
## Cd52, Pf4, Wfdc17, Laptm5, Fth1, Cd74, Ms4a6c, Lgals3, Clec4n, Cd68
## Lst1, Alox5ap, H2-Aa, Ptpn18, Ucp2, Id2, AF251705, Trem2, Fcgr3, Clec4a2
## PC_ 2
## Positive: Col1a1, Col1a2, Col3a1, Mfap4, Lum, Postn, Col5a2, Col12a1, Dcn, Thbs2
## Igf1, Aebp1, Mdk, Mmp2, Itm2a, Col5a1, Spon1, Lox, Ptn, Olfml3
## Dpt, Cthrc1, Fndc1, Aspn, Serpinf1, Lrrc15, Col6a2, Mfap5, Col6a1, Prss35
## Negative: Cdh5, Col4a1, Col4a2, Pecam1, Egfl7, Igfbp7, Cd93, Crip2, Cav1, Ramp2
## Plvap, Vim, Col15a1, Myct1, Kdr, Adgrf5, Esam, Pdlim1, Emcn, Ctla2a
## Ecscr, Sparcl1, Col18a1, Eng, Adgrl4, Mcam, Itga6, Gimap6, Ehd4, F11r
## PC_ 3
## Positive: Sparcl1, Rgs5, Il6, Tinagl1, Igfbp7, Col4a1, Col4a2, Gm13889, Fabp4, Ebf1
## Ly6c1, Sept4, Emcn, Adgrf5, Ndufa4l2, Pecam1, Esam, Adgrl4, Flt1, Notch3
## Adamts9, Epas1, Des, Bcr, Vtn, Mcam, 2200002D01Rik, Higd1b, Apold1, Cygb
## Negative: Ftl1, Tyrobp, Fth1, Fcer1g, Lyz2, Cyba, Lgals3, Apoe, C1qb, Ctss
## C1qc, C1qa, Cstb, Laptm5, Tmsb4x, Lst1, Ctsd, Pf4, Ms4a7, Cd68
## Lgmn, Gm10116, Clec4n, Aif1, Wfdc17, Csf1r, Id2, Alox5ap, Ms4a6c, Ucp2
## PC_ 4
## Positive: Rgs5, Ndufa4l2, Gm13889, Crip1, Acta2, Higd1b, Des, Notch3, Cox4i2, Thy1
## Mylk, Pdgfa, S100a4, Serpine2, Ppp1r14a, Il6, Myl9, Rasgrp2, Ebf1, Tuba1b
## 2810417H13Rik, Cygb, Stmn1, Mgp, Ube2c, Mustn1, Cks2, Col4a1, Tpm1, H2afz
## Negative: Mfap4, Cdh5, Pecam1, Igf1, Col1a2, Cd34, Cd93, Ptn, Col3a1, Col1a1
## Ramp2, S100a16, Egfl7, Mest, Lum, Myct1, Dpt, Thbs2, Mdk, Itm2a
## Aebp1, Col14a1, Kdr, Ly6c1, Ogn, Ctla2a, C1qtnf3, Adgrl4, Emcn, Plvap
## PC_ 5
## Positive: Birc5, Stmn1, H2afz, Ube2c, Top2a, Hmgb2, Tuba1b, 2810417H13Rik, Cenpa, Cks2
## Cdca3, 2700094K13Rik, Cks1b, Ccnb2, Tubb5, Hmgb1, Cdk1, H2afx, Smc2, Lockd
## Ccnb1, Cdca8, Ccna2, Prc1, Ccdc34, H2afv, Ube2s, Hmgn2, Nusap1, Spc24
## Negative: Rgs5, Cxcl1, Gm13889, Il6, Meg3, Serping1, Errfi1, Serpine2, Mylk, Col4a1
## Notch3, Ndufa4l2, Ccl2, Ebf1, Mt1, Col4a2, Des, Neat1, Phlda1, Cygb
## Dnajb1, Malat1, Apold1, Col18a1, Tnfaip2, Adamts4, Higd1b, Nr4a1, Sparcl1, Bcr
print(mouse[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: Sparc, Bgn, Col1a2, Col1a1, Col3a1
## Negative: Tyrobp, Fcer1g, Lyz2, C1qb, Apoe
## PC_ 2
## Positive: Col1a1, Col1a2, Col3a1, Mfap4, Lum
## Negative: Cdh5, Col4a1, Col4a2, Pecam1, Egfl7
## PC_ 3
## Positive: Sparcl1, Rgs5, Il6, Tinagl1, Igfbp7
## Negative: Ftl1, Tyrobp, Fth1, Fcer1g, Lyz2
## PC_ 4
## Positive: Rgs5, Ndufa4l2, Gm13889, Crip1, Acta2
## Negative: Mfap4, Cdh5, Pecam1, Igf1, Col1a2
## PC_ 5
## Positive: Birc5, Stmn1, H2afz, Ube2c, Top2a
## Negative: Rgs5, Cxcl1, Gm13889, Il6, Meg3
VizDimLoadings(mouse, dims = 1:2, reduction = "pca")
DimPlot(mouse, reduction = "pca")
heatmap: exploration of the primary sources of heterogeneity -> decide which PCs to include for further downstream analyses
DimHeatmap(mouse, dims = 1, cells = 500, balanced = TRUE) #heatmap: exploration of the primary sources of heterogeneity
DimHeatmap(mouse, dims = 1:15, cells = 500, balanced = TRUE)
# VII. Determination of Dimension Metafeature combines information
across a correlated feature set. top principal components => robust
compression how many PCs to take? ###JackStraw procedure permute subset
of the data (1% by default) and rerun PCA significant PCs ~ strong
enrichment of low p-value features
mouse <- JackStraw(mouse, num.replicate = 100)
mouse <- ScoreJackStraw(mouse, dims = 1:20)
JackStrawPlot(mouse, dims = 1:15)
## Warning: Removed 21000 rows containing missing values (`geom_point()`).
alternative heuristic method Elbow plot: observe where the elbow is and then see which PCs to choose
ElbowPlot(mouse)
# VIII. Clustering Cells
mouse <- FindNeighbors(mouse, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
#mouse <- FindClusters(mouse, resolution = 0.5)
mouse <- FindClusters(mouse, resolution = 0.4)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 22322
## Number of edges: 704154
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9112
## Number of communities: 12
## Elapsed time: 3 seconds
# Look at cluster IDs of the first 5 cells
head(Idents(mouse), 5)
## AAACCTGAGAATTCCC-1 AAACCTGAGACCGGAT-1 AAACCTGAGACGCACA-1 AAACCTGAGATGTGGC-1
## 2 1 4 1
## AAACCTGAGCTGCGAA-1
## 7
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11
input to the UMAP and tSNE: same PCs as input to the clustering analysis
mouse <- RunUMAP(mouse, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 10:31:29 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:31:29 Read 22322 rows and found 10 numeric columns
## 10:31:29 Using Annoy for neighbor search, n_neighbors = 30
## 10:31:29 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:31:32 Writing NN index file to temp file /var/folders/w_/82bj411144v93vqxt1tbsg9h0000gn/T//Rtmp4v9dGB/filec535721e914b
## 10:31:32 Searching Annoy index using 1 thread, search_k = 3000
## 10:31:41 Annoy recall = 100%
## 10:31:41 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 10:31:42 Initializing from normalized Laplacian + noise (using irlba)
## 10:31:43 Commencing optimization for 200 epochs, with 905958 positive edges
## 10:31:57 Optimization finished
#note that you can set `label = TRUE` or use the LabelClusters function to help label individual clusters
DimPlot(mouse, reduction = "umap")
#saveRDS(mouse, file = "/Users/xiaoyizheng/Downloads/Bioinfo_Einarbeiten/CellChatSelf/mouse/output/mouseOutput.rds")
cluster2.markers <- FindMarkers(mouse, ident.1 = 2, min.pct = 0.25)
head(cluster2.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Col3a1 0 -1.8095141 0.993 0.963 0
## Des 0 0.6945509 0.463 0.054 0
## Serpine2 0 1.1771494 0.883 0.388 0
## Rgs5 0 2.2070748 0.957 0.216 0
## Rgs4 0 0.3445950 0.256 0.049 0
cluster5.markers <- FindMarkers(mouse, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25)
head(cluster5.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Fcer1g 0 1.2996233 0.699 0.154 0
## Il1b 0 1.5218234 0.721 0.269 0
## Ctss 0 0.6950059 0.416 0.047 0
## Laptm5 0 0.5755497 0.395 0.043 0
## Cd52 0 1.1293071 0.659 0.114 0
mouse.markers <- FindAllMarkers(mouse, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) #No features pass logfc.threshold threshold, maybe threshold=0.1, 0.15, 0.2?
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
mouse.markers <- FindAllMarkers(mouse, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.2) #0.2 is okki
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
## Calculating cluster 11
mouse.markers %>%
group_by(cluster) %>%
slice_max(n = 2, order_by = avg_log2FC)
## # A tibble: 24 × 7
## # Groups: cluster [12]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 1.29e-124 0.281 0.761 0.535 2.21e-120 0 Itm2a
## 2 1.24e-134 0.211 0.868 0.586 2.12e-130 0 Mfap4
## 3 3.32e-139 1.56 0.357 0.182 5.67e-135 1 Saa3
## 4 0 1.17 0.909 0.509 0 1 Crabp1
## 5 0 2.21 0.957 0.216 0 2 Rgs5
## 6 0 1.38 0.974 0.364 0 2 Col4a1
## 7 0 1.77 0.919 0.429 0 3 Ptn
## 8 0 1.62 0.821 0.327 0 3 H19
## 9 0 2.51 0.892 0.193 0 4 Ctla2a
## 10 0 1.73 0.962 0.381 0 4 Col4a1
## # … with 14 more rows
cluster0.markers <- FindMarkers(mouse, ident.1 = 0, logfc.threshold = 0.25, test.use = "roc", only.pos = TRUE)
cluster0.markers <- FindMarkers(mouse, ident.1 = 0, logfc.threshold = 0.2, test.use = "roc", only.pos = TRUE)
VlnPlot(mouse, features = c("Rgs5", "Tyrobp"))
VlnPlot(mouse, features = c("Col1a1", "Col1a2"), slot = "counts", log = TRUE) #plot raw counts
cluster0.markers <- FindMarkers(mouse, ident.1 = 0, min.pct = 0.25)
head(cluster0.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## B2m 0 -0.9270665 0.724 0.909 0
## Tmsb4x 0 -1.0219734 0.988 0.999 0
## Ftl1 0 -1.1787538 0.995 0.998 0
## Col4a1 0 -1.4380142 0.181 0.487 0
## Nfkbia 0 -0.7260907 0.776 0.909 0
VlnPlot(mouse, features = c("Vim", "B2m", "Cst3", "Tmsb4x", "Ftl1"))
cluster1.markers <- FindMarkers(mouse, ident.1 = 1, min.pct = 0.25)
head(cluster1.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Rpl7 0 0.5360234 0.989 0.931 0
## Rpl31 0 0.5888139 0.998 0.974 0
## Col3a1 0 0.6539569 1.000 0.960 0
## Col5a2 0 0.6903938 0.991 0.760 0
## Fn1 0 0.6744519 0.962 0.725 0
VlnPlot(mouse, features = c("Rpl7", "Rpl31", "Col3a1", "Col5a2", "Spats2l"))
cluster2.markers <- FindMarkers(mouse, ident.1 = 2, min.pct = 0.25)
head(cluster2.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Col3a1 0 -1.8095141 0.993 0.963 0
## Des 0 0.6945509 0.463 0.054 0
## Serpine2 0 1.1771494 0.883 0.388 0
## Rgs5 0 2.2070748 0.957 0.216 0
## Rgs4 0 0.3445950 0.256 0.049 0
VlnPlot(mouse, features = c("Col3a1", "Des", "Serpine2", "Rgs5", "Rgs4"))
cluster3.markers <- FindMarkers(mouse, ident.1 = 3, min.pct = 0.25)
head(cluster3.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Col3a1 0 1.3361046 1.000 0.962 0
## Col5a2 0 0.9233501 0.992 0.768 0
## Fmod 0 0.5839398 0.351 0.064 0
## Angptl1 0 0.5077047 0.357 0.073 0
## Dpt 0 0.9019990 0.765 0.325 0
VlnPlot(mouse, features = c("Col3a1", "Col5a2", "Selp", "F11r", "Cd34"))
“Selp”, “F11r”, “Cd34” very significant
cluster4.markers <- FindMarkers(mouse, ident.1 = 4, min.pct = 0.25)
head(cluster4.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Col3a1 0 -1.4833523 0.894 0.974 0
## Col5a2 0 -1.0876949 0.479 0.825 0
## Selp 0 0.9678140 0.305 0.013 0
## F11r 0 0.5397448 0.352 0.018 0
## Cd34 0 0.9272157 0.598 0.084 0
VlnPlot(mouse, features = c("Col3a1", "Mdk", "Tmsb4x", "Eln", "Col1a2"))
cluster5.markers <- FindMarkers(mouse, ident.1 = 5, ident.2 = c(0, 3), min.pct = 0.25)
head(cluster5.markers, n = 5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## Fcer1g 0 1.2996233 0.699 0.154 0
## Il1b 0 1.5218234 0.721 0.269 0
## Ctss 0 0.6950059 0.416 0.047 0
## Laptm5 0 0.5755497 0.395 0.043 0
## Cd52 0 1.1293071 0.659 0.114 0
VlnPlot(mouse, features = c("Fcer1g", "Il1b", "Ctss", "Laptm5", "Cd52"))
aus Paper supplm
VlnPlot(mouse, features = c("Crabp1", "Des", "Rgs5", "C1qb", "Pf4", "Eln"))
VlnPlot(mouse, features = c("Ogn", "Cd93", "Pecam1","Birc5", "Ccnb2", "Icos"))
VlnPlot(mouse, features = c("Nkg7", "Ccr7", "H2-DMb1", "Hdc", "G0s2", "Cadm4"))
VlnPlot(mouse, features = c("Itih5", "Hba-a2", "Hbb-bs", "Acp5", "Mmp9", "Ccl21a"))
VlnPlot(mouse, features = c("Lyve1"))
FeaturePlot(mouse, features = c("Sparc", "Tyrobp", "Igfbp7", "Pecam1", "Cd34", "Mfap4", "Ndufa4l2", "Il6",
"Birc5"))
mouse.markers %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(mouse, features = top10$gene) + NoLegend()
new.cluster.ids <- c("0", "FIB-1", "FIB-2", "ENDO", "FIB-3", "5", "MYL", "7", "8", "9", "10", "SCH", "DEN")
names(new.cluster.ids) <- levels(mouse)
mouse <- RenameIdents(mouse, new.cluster.ids)
DimPlot(mouse, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
#FindConservedMarkers(mouse, idents.1= [nr.] ) #Idents(mouse) #RenameIdents(mouse, ‘[nr.]’= [celltype] )